{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tp_df = pd.read_csv(\"dataset/train_true_positive_features.csv\")\n",
    "tn_df = pd.read_csv(\"dataset/train_true_negative_features.csv\")\n",
    "df = pd.concat([tp_df, tn_df])\n",
    "df = df.reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[['vgg_cosine', 'vgg_euclidean_l2'\n",
    "         , 'facenet_cosine', 'facenet_euclidean_l2'\n",
    "         , 'openface_cosine', 'openface_euclidean_l2'\n",
    "         , 'is_related']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>vgg_cosine</th>\n",
       "      <th>vgg_euclidean_l2</th>\n",
       "      <th>facenet_cosine</th>\n",
       "      <th>facenet_euclidean_l2</th>\n",
       "      <th>openface_cosine</th>\n",
       "      <th>openface_euclidean_l2</th>\n",
       "      <th>is_related</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>101280</th>\n",
       "      <td>0.502680</td>\n",
       "      <td>1.002676</td>\n",
       "      <td>0.678233</td>\n",
       "      <td>1.164675</td>\n",
       "      <td>1.178907</td>\n",
       "      <td>1.535517</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>358811</th>\n",
       "      <td>0.529600</td>\n",
       "      <td>1.029175</td>\n",
       "      <td>0.819899</td>\n",
       "      <td>1.280546</td>\n",
       "      <td>0.645098</td>\n",
       "      <td>1.135868</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125682</th>\n",
       "      <td>0.586000</td>\n",
       "      <td>1.082590</td>\n",
       "      <td>0.721757</td>\n",
       "      <td>1.201464</td>\n",
       "      <td>0.964883</td>\n",
       "      <td>1.389160</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152342</th>\n",
       "      <td>0.434440</td>\n",
       "      <td>0.932137</td>\n",
       "      <td>0.399840</td>\n",
       "      <td>0.894248</td>\n",
       "      <td>0.302438</td>\n",
       "      <td>0.777737</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100969</th>\n",
       "      <td>0.412297</td>\n",
       "      <td>0.908071</td>\n",
       "      <td>0.600329</td>\n",
       "      <td>1.095746</td>\n",
       "      <td>0.386677</td>\n",
       "      <td>0.879406</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        vgg_cosine  vgg_euclidean_l2  facenet_cosine  facenet_euclidean_l2  \\\n",
       "101280    0.502680          1.002676        0.678233              1.164675   \n",
       "358811    0.529600          1.029175        0.819899              1.280546   \n",
       "125682    0.586000          1.082590        0.721757              1.201464   \n",
       "152342    0.434440          0.932137        0.399840              0.894248   \n",
       "100969    0.412297          0.908071        0.600329              1.095746   \n",
       "\n",
       "        openface_cosine  openface_euclidean_l2  is_related  \n",
       "101280         1.178907               1.535517           1  \n",
       "358811         0.645098               1.135868           0  \n",
       "125682         0.964883               1.389160           1  \n",
       "152342         0.302438               0.777737           1  \n",
       "100969         0.386677               0.879406           1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train Test Split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test = train_test_split(df, test_size=0.15, random_state=17)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# H2O AutoML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import h2o\n",
    "from h2o.automl import H2OAutoML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.26.0.3'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "h2o.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# specify maximum memory size and number of threads based on your system"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "72"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import multiprocessing\n",
    "multiprocessing.cpu_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "svmem(total=1622764929024, available=669890707456, percent=58.7, used=949029715968, free=574988288000, active=933365055488, inactive=94913318912, buffers=0, cached=98746925056, shared=961851392, slab=2740187136)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import psutil\n",
    "psutil.virtual_memory()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking whether there is an H2O instance running at http://127.0.0.1:54321 . connected.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime:</td>\n",
       "<td>6 mins 51 secs</td></tr>\n",
       "<tr><td>H2O cluster timezone:</td>\n",
       "<td>Europe/Istanbul</td></tr>\n",
       "<tr><td>H2O data parsing timezone:</td>\n",
       "<td>UTC</td></tr>\n",
       "<tr><td>H2O cluster version:</td>\n",
       "<td>3.26.0.3</td></tr>\n",
       "<tr><td>H2O cluster version age:</td>\n",
       "<td>11 days </td></tr>\n",
       "<tr><td>H2O cluster name:</td>\n",
       "<td>H2O_from_python_sefik_xmj4a9</td></tr>\n",
       "<tr><td>H2O cluster total nodes:</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O cluster free memory:</td>\n",
       "<td>177.0 Gb</td></tr>\n",
       "<tr><td>H2O cluster total cores:</td>\n",
       "<td>72</td></tr>\n",
       "<tr><td>H2O cluster allowed cores:</td>\n",
       "<td>10</td></tr>\n",
       "<tr><td>H2O cluster status:</td>\n",
       "<td>locked, healthy</td></tr>\n",
       "<tr><td>H2O connection url:</td>\n",
       "<td>http://127.0.0.1:54321</td></tr>\n",
       "<tr><td>H2O connection proxy:</td>\n",
       "<td>None</td></tr>\n",
       "<tr><td>H2O internal security:</td>\n",
       "<td>False</td></tr>\n",
       "<tr><td>H2O API Extensions:</td>\n",
       "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4</td></tr>\n",
       "<tr><td>Python version:</td>\n",
       "<td>3.6.3 final</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ---------------------------------------------------\n",
       "H2O cluster uptime:         6 mins 51 secs\n",
       "H2O cluster timezone:       Europe/Istanbul\n",
       "H2O data parsing timezone:  UTC\n",
       "H2O cluster version:        3.26.0.3\n",
       "H2O cluster version age:    11 days\n",
       "H2O cluster name:           H2O_from_python_sefik_xmj4a9\n",
       "H2O cluster total nodes:    1\n",
       "H2O cluster free memory:    177.0 Gb\n",
       "H2O cluster total cores:    72\n",
       "H2O cluster allowed cores:  10\n",
       "H2O cluster status:         locked, healthy\n",
       "H2O connection url:         http://127.0.0.1:54321\n",
       "H2O connection proxy:\n",
       "H2O internal security:      False\n",
       "H2O API Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4\n",
       "Python version:             3.6.3 final\n",
       "--------------------------  ---------------------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#h2o.init()\n",
    "h2o.init(ip=\"127.0.0.1\", max_mem_size_GB = 200, nthreads = 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "#pandas to h2o frame\n",
    "hf = h2o.H2OFrame(x_train)\n",
    "\n",
    "#alternatively, we can directly import h2o frame instead of converting from pandas\n",
    "#hf = h2o.import_file('dataset/x_train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th style=\"text-align: right;\">  vgg_cosine</th><th style=\"text-align: right;\">  vgg_euclidean_l2</th><th style=\"text-align: right;\">  facenet_cosine</th><th style=\"text-align: right;\">  facenet_euclidean_l2</th><th style=\"text-align: right;\">  openface_cosine</th><th style=\"text-align: right;\">  openface_euclidean_l2</th><th style=\"text-align: right;\">  is_related</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td style=\"text-align: right;\">    0.461026</td><td style=\"text-align: right;\">          0.960236</td><td style=\"text-align: right;\">        0.759346</td><td style=\"text-align: right;\">              1.23235 </td><td style=\"text-align: right;\">         0.380667</td><td style=\"text-align: right;\">               0.872545</td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.523298</td><td style=\"text-align: right;\">          1.02303 </td><td style=\"text-align: right;\">        0.820089</td><td style=\"text-align: right;\">              1.28069 </td><td style=\"text-align: right;\">         0.572227</td><td style=\"text-align: right;\">               1.06979 </td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.57931 </td><td style=\"text-align: right;\">          1.07639 </td><td style=\"text-align: right;\">        0.834568</td><td style=\"text-align: right;\">              1.29195 </td><td style=\"text-align: right;\">         0.411612</td><td style=\"text-align: right;\">               0.907317</td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.661336</td><td style=\"text-align: right;\">          1.15007 </td><td style=\"text-align: right;\">        1.25314 </td><td style=\"text-align: right;\">              1.58312 </td><td style=\"text-align: right;\">         0.809752</td><td style=\"text-align: right;\">               1.2726  </td><td style=\"text-align: right;\">           0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.380073</td><td style=\"text-align: right;\">          0.871864</td><td style=\"text-align: right;\">        0.419412</td><td style=\"text-align: right;\">              0.915873</td><td style=\"text-align: right;\">         0.386296</td><td style=\"text-align: right;\">               0.878972</td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.586928</td><td style=\"text-align: right;\">          1.08345 </td><td style=\"text-align: right;\">        0.980849</td><td style=\"text-align: right;\">              1.40061 </td><td style=\"text-align: right;\">         0.791823</td><td style=\"text-align: right;\">               1.25843 </td><td style=\"text-align: right;\">           0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.543978</td><td style=\"text-align: right;\">          1.04305 </td><td style=\"text-align: right;\">        0.900194</td><td style=\"text-align: right;\">              1.34179 </td><td style=\"text-align: right;\">         0.88473 </td><td style=\"text-align: right;\">               1.33021 </td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.378311</td><td style=\"text-align: right;\">          0.86984 </td><td style=\"text-align: right;\">        0.799367</td><td style=\"text-align: right;\">              1.26441 </td><td style=\"text-align: right;\">         0.467552</td><td style=\"text-align: right;\">               0.967008</td><td style=\"text-align: right;\">           0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.483638</td><td style=\"text-align: right;\">          0.983502</td><td style=\"text-align: right;\">        0.590177</td><td style=\"text-align: right;\">              1.08644 </td><td style=\"text-align: right;\">         0.411636</td><td style=\"text-align: right;\">               0.907343</td><td style=\"text-align: right;\">           1</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    0.874395</td><td style=\"text-align: right;\">          1.32242 </td><td style=\"text-align: right;\">        0.935428</td><td style=\"text-align: right;\">              1.36779 </td><td style=\"text-align: right;\">         1.11803 </td><td style=\"text-align: right;\">               1.49535 </td><td style=\"text-align: right;\">           0</td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_label = \"is_related\"\n",
    "x_labels = list(df.drop(columns=[y_label]).columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['vgg_cosine',\n",
       " 'vgg_euclidean_l2',\n",
       " 'facenet_cosine',\n",
       " 'facenet_euclidean_l2',\n",
       " 'openface_cosine',\n",
       " 'openface_euclidean_l2']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "#this is a binary classification problem. convert is_related column to enum type instead of numerical\n",
    "#otherwise, it would be a regression problem\n",
    "hf[y_label] = hf[y_label].asfactor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hf.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "aml = H2OAutoML(max_runtime_secs=60*60*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AutoML progress: |████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "aml.train(x = x_labels, y = y_label, training_frame = hf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Leaderboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "lb = aml.leaderboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th>model_id                                           </th><th style=\"text-align: right;\">     auc</th><th style=\"text-align: right;\">  logloss</th><th style=\"text-align: right;\">  mean_per_class_error</th><th style=\"text-align: right;\">    rmse</th><th style=\"text-align: right;\">     mse</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>StackedEnsemble_BestOfFamily_AutoML_20190904_051328</td><td style=\"text-align: right;\">0.724967</td><td style=\"text-align: right;\"> 0.581732</td><td style=\"text-align: right;\">              0.341478</td><td style=\"text-align: right;\">0.445439</td><td style=\"text-align: right;\">0.198416</td></tr>\n",
       "<tr><td>StackedEnsemble_AllModels_AutoML_20190904_051328   </td><td style=\"text-align: right;\">0.724951</td><td style=\"text-align: right;\"> 0.581743</td><td style=\"text-align: right;\">              0.344451</td><td style=\"text-align: right;\">0.445446</td><td style=\"text-align: right;\">0.198422</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_6      </td><td style=\"text-align: right;\">0.724634</td><td style=\"text-align: right;\"> 0.581235</td><td style=\"text-align: right;\">              0.344413</td><td style=\"text-align: right;\">0.445333</td><td style=\"text-align: right;\">0.198321</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_7      </td><td style=\"text-align: right;\">0.724633</td><td style=\"text-align: right;\"> 0.581262</td><td style=\"text-align: right;\">              0.343124</td><td style=\"text-align: right;\">0.445342</td><td style=\"text-align: right;\">0.19833 </td></tr>\n",
       "<tr><td>XGBoost_3_AutoML_20190904_051328                   </td><td style=\"text-align: right;\">0.724627</td><td style=\"text-align: right;\"> 0.581256</td><td style=\"text-align: right;\">              0.343848</td><td style=\"text-align: right;\">0.44534 </td><td style=\"text-align: right;\">0.198327</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_11     </td><td style=\"text-align: right;\">0.724596</td><td style=\"text-align: right;\"> 0.581288</td><td style=\"text-align: right;\">              0.344023</td><td style=\"text-align: right;\">0.445352</td><td style=\"text-align: right;\">0.198339</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_3          </td><td style=\"text-align: right;\">0.724551</td><td style=\"text-align: right;\"> 0.581298</td><td style=\"text-align: right;\">              0.34251 </td><td style=\"text-align: right;\">0.445363</td><td style=\"text-align: right;\">0.198348</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_8      </td><td style=\"text-align: right;\">0.724526</td><td style=\"text-align: right;\"> 0.58139 </td><td style=\"text-align: right;\">              0.343837</td><td style=\"text-align: right;\">0.445379</td><td style=\"text-align: right;\">0.198362</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_12     </td><td style=\"text-align: right;\">0.724495</td><td style=\"text-align: right;\"> 0.581425</td><td style=\"text-align: right;\">              0.343161</td><td style=\"text-align: right;\">0.445397</td><td style=\"text-align: right;\">0.198378</td></tr>\n",
       "<tr><td>GBM_1_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.724479</td><td style=\"text-align: right;\"> 0.581346</td><td style=\"text-align: right;\">              0.344121</td><td style=\"text-align: right;\">0.445382</td><td style=\"text-align: right;\">0.198365</td></tr>\n",
       "<tr><td>GBM_2_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.724332</td><td style=\"text-align: right;\"> 0.581447</td><td style=\"text-align: right;\">              0.344268</td><td style=\"text-align: right;\">0.445442</td><td style=\"text-align: right;\">0.198419</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_16         </td><td style=\"text-align: right;\">0.724257</td><td style=\"text-align: right;\"> 0.581578</td><td style=\"text-align: right;\">              0.342637</td><td style=\"text-align: right;\">0.445468</td><td style=\"text-align: right;\">0.198442</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_6          </td><td style=\"text-align: right;\">0.724183</td><td style=\"text-align: right;\"> 0.581906</td><td style=\"text-align: right;\">              0.343913</td><td style=\"text-align: right;\">0.44556 </td><td style=\"text-align: right;\">0.198523</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_10         </td><td style=\"text-align: right;\">0.724114</td><td style=\"text-align: right;\"> 0.581802</td><td style=\"text-align: right;\">              0.343412</td><td style=\"text-align: right;\">0.445555</td><td style=\"text-align: right;\">0.198519</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_13         </td><td style=\"text-align: right;\">0.723948</td><td style=\"text-align: right;\"> 0.58566 </td><td style=\"text-align: right;\">              0.342898</td><td style=\"text-align: right;\">0.447042</td><td style=\"text-align: right;\">0.199846</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_14         </td><td style=\"text-align: right;\">0.723926</td><td style=\"text-align: right;\"> 0.581987</td><td style=\"text-align: right;\">              0.344366</td><td style=\"text-align: right;\">0.445623</td><td style=\"text-align: right;\">0.19858 </td></tr>\n",
       "<tr><td>GBM_3_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.723867</td><td style=\"text-align: right;\"> 0.581787</td><td style=\"text-align: right;\">              0.343686</td><td style=\"text-align: right;\">0.445606</td><td style=\"text-align: right;\">0.198565</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_15         </td><td style=\"text-align: right;\">0.723632</td><td style=\"text-align: right;\"> 0.585257</td><td style=\"text-align: right;\">              0.343069</td><td style=\"text-align: right;\">0.446869</td><td style=\"text-align: right;\">0.199692</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_12         </td><td style=\"text-align: right;\">0.723399</td><td style=\"text-align: right;\"> 0.584497</td><td style=\"text-align: right;\">              0.343891</td><td style=\"text-align: right;\">0.446658</td><td style=\"text-align: right;\">0.199503</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_4 </td><td style=\"text-align: right;\">0.723317</td><td style=\"text-align: right;\"> 0.585351</td><td style=\"text-align: right;\">              0.344973</td><td style=\"text-align: right;\">0.446426</td><td style=\"text-align: right;\">0.199296</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_7 </td><td style=\"text-align: right;\">0.723276</td><td style=\"text-align: right;\"> 0.582969</td><td style=\"text-align: right;\">              0.343778</td><td style=\"text-align: right;\">0.445987</td><td style=\"text-align: right;\">0.198905</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_9      </td><td style=\"text-align: right;\">0.723159</td><td style=\"text-align: right;\"> 0.582236</td><td style=\"text-align: right;\">              0.346331</td><td style=\"text-align: right;\">0.44582 </td><td style=\"text-align: right;\">0.198756</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_2 </td><td style=\"text-align: right;\">0.72315 </td><td style=\"text-align: right;\"> 0.584106</td><td style=\"text-align: right;\">              0.342195</td><td style=\"text-align: right;\">0.446307</td><td style=\"text-align: right;\">0.19919 </td></tr>\n",
       "<tr><td>DeepLearning_1_AutoML_20190904_051328              </td><td style=\"text-align: right;\">0.72307 </td><td style=\"text-align: right;\"> 0.582706</td><td style=\"text-align: right;\">              0.344028</td><td style=\"text-align: right;\">0.445949</td><td style=\"text-align: right;\">0.19887 </td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_7          </td><td style=\"text-align: right;\">0.722854</td><td style=\"text-align: right;\"> 0.586061</td><td style=\"text-align: right;\">              0.344379</td><td style=\"text-align: right;\">0.447315</td><td style=\"text-align: right;\">0.200091</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_1 </td><td style=\"text-align: right;\">0.722711</td><td style=\"text-align: right;\"> 0.585214</td><td style=\"text-align: right;\">              0.345865</td><td style=\"text-align: right;\">0.446652</td><td style=\"text-align: right;\">0.199498</td></tr>\n",
       "<tr><td>XGBoost_1_AutoML_20190904_051328                   </td><td style=\"text-align: right;\">0.722613</td><td style=\"text-align: right;\"> 0.582664</td><td style=\"text-align: right;\">              0.347477</td><td style=\"text-align: right;\">0.446003</td><td style=\"text-align: right;\">0.198918</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_10     </td><td style=\"text-align: right;\">0.722158</td><td style=\"text-align: right;\"> 0.583113</td><td style=\"text-align: right;\">              0.345538</td><td style=\"text-align: right;\">0.446189</td><td style=\"text-align: right;\">0.199085</td></tr>\n",
       "<tr><td>GLM_grid_1_AutoML_20190904_051328_model_1          </td><td style=\"text-align: right;\">0.721808</td><td style=\"text-align: right;\"> 0.583937</td><td style=\"text-align: right;\">              0.343209</td><td style=\"text-align: right;\">0.44642 </td><td style=\"text-align: right;\">0.199291</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_3 </td><td style=\"text-align: right;\">0.721694</td><td style=\"text-align: right;\"> 0.586537</td><td style=\"text-align: right;\">              0.347253</td><td style=\"text-align: right;\">0.447041</td><td style=\"text-align: right;\">0.199846</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_5 </td><td style=\"text-align: right;\">0.72164 </td><td style=\"text-align: right;\"> 0.584206</td><td style=\"text-align: right;\">              0.34734 </td><td style=\"text-align: right;\">0.446486</td><td style=\"text-align: right;\">0.19935 </td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_8          </td><td style=\"text-align: right;\">0.721626</td><td style=\"text-align: right;\"> 0.583543</td><td style=\"text-align: right;\">              0.348005</td><td style=\"text-align: right;\">0.446399</td><td style=\"text-align: right;\">0.199272</td></tr>\n",
       "<tr><td>GBM_4_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.721412</td><td style=\"text-align: right;\"> 0.583591</td><td style=\"text-align: right;\">              0.346227</td><td style=\"text-align: right;\">0.446465</td><td style=\"text-align: right;\">0.199331</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_3      </td><td style=\"text-align: right;\">0.721218</td><td style=\"text-align: right;\"> 0.583861</td><td style=\"text-align: right;\">              0.34414 </td><td style=\"text-align: right;\">0.446531</td><td style=\"text-align: right;\">0.19939 </td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_9          </td><td style=\"text-align: right;\">0.720238</td><td style=\"text-align: right;\"> 0.589392</td><td style=\"text-align: right;\">              0.344588</td><td style=\"text-align: right;\">0.448857</td><td style=\"text-align: right;\">0.201472</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_2      </td><td style=\"text-align: right;\">0.719538</td><td style=\"text-align: right;\"> 0.584869</td><td style=\"text-align: right;\">              0.345875</td><td style=\"text-align: right;\">0.447062</td><td style=\"text-align: right;\">0.199864</td></tr>\n",
       "<tr><td>GBM_5_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.718991</td><td style=\"text-align: right;\"> 0.585331</td><td style=\"text-align: right;\">              0.347423</td><td style=\"text-align: right;\">0.447233</td><td style=\"text-align: right;\">0.200018</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_2          </td><td style=\"text-align: right;\">0.717778</td><td style=\"text-align: right;\"> 0.617341</td><td style=\"text-align: right;\">              0.345521</td><td style=\"text-align: right;\">0.462239</td><td style=\"text-align: right;\">0.213665</td></tr>\n",
       "<tr><td>XGBoost_2_AutoML_20190904_051328                   </td><td style=\"text-align: right;\">0.717373</td><td style=\"text-align: right;\"> 0.586632</td><td style=\"text-align: right;\">              0.350243</td><td style=\"text-align: right;\">0.447829</td><td style=\"text-align: right;\">0.20055 </td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_1      </td><td style=\"text-align: right;\">0.717272</td><td style=\"text-align: right;\"> 0.586681</td><td style=\"text-align: right;\">              0.347522</td><td style=\"text-align: right;\">0.447861</td><td style=\"text-align: right;\">0.200579</td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_5      </td><td style=\"text-align: right;\">0.716173</td><td style=\"text-align: right;\"> 0.587585</td><td style=\"text-align: right;\">              0.352185</td><td style=\"text-align: right;\">0.448278</td><td style=\"text-align: right;\">0.200953</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_5          </td><td style=\"text-align: right;\">0.71384 </td><td style=\"text-align: right;\"> 0.590447</td><td style=\"text-align: right;\">              0.354189</td><td style=\"text-align: right;\">0.449405</td><td style=\"text-align: right;\">0.201965</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_11         </td><td style=\"text-align: right;\">0.712899</td><td style=\"text-align: right;\"> 0.590345</td><td style=\"text-align: right;\">              0.353554</td><td style=\"text-align: right;\">0.4496  </td><td style=\"text-align: right;\">0.20214 </td></tr>\n",
       "<tr><td>XGBoost_grid_1_AutoML_20190904_051328_model_4      </td><td style=\"text-align: right;\">0.712031</td><td style=\"text-align: right;\"> 0.591519</td><td style=\"text-align: right;\">              0.352147</td><td style=\"text-align: right;\">0.450112</td><td style=\"text-align: right;\">0.202601</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_8 </td><td style=\"text-align: right;\">0.710324</td><td style=\"text-align: right;\"> 0.602387</td><td style=\"text-align: right;\">              0.347693</td><td style=\"text-align: right;\">0.452957</td><td style=\"text-align: right;\">0.20517 </td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_4          </td><td style=\"text-align: right;\">0.703282</td><td style=\"text-align: right;\"> 0.601399</td><td style=\"text-align: right;\">              0.363952</td><td style=\"text-align: right;\">0.453902</td><td style=\"text-align: right;\">0.206027</td></tr>\n",
       "<tr><td>DeepLearning_grid_1_AutoML_20190904_051328_model_6 </td><td style=\"text-align: right;\">0.702775</td><td style=\"text-align: right;\"> 0.599338</td><td style=\"text-align: right;\">              0.35899 </td><td style=\"text-align: right;\">0.453422</td><td style=\"text-align: right;\">0.205592</td></tr>\n",
       "<tr><td>DRF_1_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.702123</td><td style=\"text-align: right;\"> 0.604211</td><td style=\"text-align: right;\">              0.367061</td><td style=\"text-align: right;\">0.454424</td><td style=\"text-align: right;\">0.206501</td></tr>\n",
       "<tr><td>XRT_1_AutoML_20190904_051328                       </td><td style=\"text-align: right;\">0.702038</td><td style=\"text-align: right;\"> 0.604509</td><td style=\"text-align: right;\">              0.36651 </td><td style=\"text-align: right;\">0.454458</td><td style=\"text-align: right;\">0.206532</td></tr>\n",
       "<tr><td>GBM_grid_1_AutoML_20190904_051328_model_1          </td><td style=\"text-align: right;\">0.700386</td><td style=\"text-align: right;\"> 0.603128</td><td style=\"text-align: right;\">              0.364302</td><td style=\"text-align: right;\">0.454868</td><td style=\"text-align: right;\">0.206905</td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#lb.head()\n",
    "lb.head(rows=lb.nrows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "saved_model = h2o.save_model(aml.leader, path = \"\", force=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/outputs/sefik/kinship/StackedEnsemble_BestOfFamily_AutoML_20190904_051328'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saved_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#you can directly load the best model by running the load_model command\n",
    "#aml = h2o.load_model(\"/outputs/sefik/kinship/StackedEnsemble_BestOfFamily_AutoML_20190904_051328\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Validation Score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "hf_val = h2o.H2OFrame(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "perf = aml.model_performance(hf_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7264606340321227\n"
     ]
    }
   ],
   "source": [
    "perf.auc()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.697363336404982\n"
     ]
    }
   ],
   "source": [
    "perf.accuracy() [0][1]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
